! python --version
Python 3.11.9
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm, colors
from matplotlib.lines import Line2D
from matplotlib.collections import PatchCollection
from matplotlib import patches
import seaborn as sns
import scipy.sparse
import geopandas as gpd
import scanpy as sc
import anndata as an
import scanorama
from pathlib import Path
import squidpy as sq
import os
import warnings
from shapely.geometry import Polygon, MultiPolygon, Point
from shapely.affinity import affine_transform
import math
sc.logging.print_versions()
----- anndata 0.10.7 scanpy 1.10.0 ----- PIL 10.3.0 annoy NA anyio NA asciitree NA asttokens NA attr 23.1.0 attrs 23.1.0 babel 2.11.0 brotli 1.0.9 certifi 2024.02.02 cffi 1.16.0 charset_normalizer 2.0.4 cloudpickle 3.0.0 colorama 0.4.6 comm 0.2.1 cycler 0.12.1 cython_runtime NA dask 2024.5.0 dask_expr 1.1.0 dask_image 2023.08.1 datashader 0.16.1 datatree 0.0.14 dateutil 2.8.2 debugpy 1.6.7 decorator 5.1.1 defusedxml 0.7.1 docrep 0.3.2 executing 0.8.3 fastjsonschema NA fbpca NA fsspec 2023.6.0 geopandas 0.14.4 h5py 3.11.0 idna 3.7 igraph 0.11.5 imageio 2.34.1 intervaltree NA ipykernel 6.28.0 ipywidgets 8.1.2 jedi 0.18.1 jinja2 3.1.3 joblib 1.4.2 json5 NA jsonschema 4.19.2 jsonschema_specifications NA jupyter_events 0.8.0 jupyter_server 2.10.0 jupyterlab_server 2.25.1 kiwisolver 1.4.5 lazy_loader 0.4 legacy_api_wrap NA leidenalg 0.10.2 llvmlite 0.42.0 markupsafe 2.1.3 matplotlib 3.8.2 matplotlib_inline 0.1.6 matplotlib_scalebar 0.8.1 mpl_toolkits NA msgpack 1.0.8 multipledispatch 0.6.0 multiscale_spatial_image 0.11.2 natsort 8.4.0 nbformat 5.9.2 networkx 3.3 numba 0.59.1 numcodecs 0.12.1 numpy 1.26.3 ome_zarr NA overrides NA packaging 23.2 pandas 2.2.0 param 2.1.0 parso 0.8.3 patsy 0.5.6 pkg_resources NA platformdirs 3.10.0 prometheus_client NA prompt_toolkit 3.0.43 psutil 5.9.0 pure_eval 0.2.2 pyarrow 16.0.0 pycparser 2.21 pyct 0.5.0 pydev_ipython NA pydevconsole NA pydevd 2.9.5 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygeos 0.14 pygments 2.15.1 pyparsing 3.1.2 pyproj 3.6.1 pythoncom NA pythonjsonlogger NA pytz 2024.1 pywintypes NA referencing NA requests 2.31.0 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rich NA rpds NA scanorama 1.7.4 scipy 1.13.0 seaborn 0.13.1 send2trash NA session_info 1.0.0 setuptools 69.5.1 shapely 2.0.4 six 1.16.0 skimage 0.23.2 sklearn 1.4.2 sniffio 1.3.0 socks 1.7.1 sortedcontainers 2.4.0 spatial_image 0.3.0 spatialdata 0.0.15 squidpy 1.4.1 stack_data 0.2.0 statsmodels 0.14.2 tblib 3.0.0 texttable 1.7.0 threadpoolctl 3.5.0 tifffile 2024.5.10 tlz 0.12.1 toolz 0.12.1 tornado 6.3.3 tqdm 4.66.4 traitlets 5.7.1 typing_extensions NA urllib3 1.26.18 validators 0.28.1 wcwidth 0.2.5 websocket 0.58.0 win32api NA win32com NA win32con NA win32trace NA winerror NA xarray 2023.12.0 xarray_dataclasses 1.7.0 xarray_schema 0.0.3 xrspatial 0.4.0 yaml 6.0.1 zarr 2.18.0 zipp NA zmq 25.1.2 ----- IPython 8.20.0 jupyter_client 8.6.0 jupyter_core 5.5.0 jupyterlab 4.0.11 notebook 7.0.8 ----- Python 3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)] Windows-10-10.0.22631-SP0 ----- Session information updated at 2024-12-15 19:41
warnings.simplefilter("ignore")
custom_palette = [ "#a70e62", "#e10e3c", "#f47140", "#f0b932", "#ffd95a", "#c2ed51", "#8ed73d", "#489c73", "#1a8d83",
"#4915be", "#2357d5", "#fed2ff", "#6ecad5", "#BD08F9", '#886fc2', "#e170c0", "#FF33CC", "#e4a9e8"]
# Read the list of genes, 200 spike genes subset
genes_to_include_file = "subset_spikegenes_list.txt"
with open(genes_to_include_file, "r") as file:
genes_to_include = [line.strip() for line in file]
VGN1e1 = late double ridge, VRT-A2a (P1 WT)
VGN1e9 = late double ridge, VRT-A2b (P1 Pol)
VGN1b6 = lemma primorida, VRT-A2a (P1 WT)
VGN1b8 = lemma primorida, VRT-A2b (P1 Pol)
VGN1a6 = terminal spikelet, VRT-A2a (P1 WT)
VGN1a4 = terminal spikelet, VRT-A2b (P1 Pol)
VGN1c2 = carpel extension round, VRT-A2a (P1 WT)
VGN1c3 = carpel extension round, VRT-A2b (P1 Pol)
First I am going to read in my filtered & normalised adata objects for each sample¶
# Define the filenames of the saved AnnData objects
adata_filenames = {
"adata_VGN1a6": "qc/adata_VGN1a6_clean.h5ad",
"adata_VGN1a4": "qc/adata_VGN1a4_clean.h5ad",
"adata_VGN1b6": "qc/adata_VGN1b6_clean.h5ad",
"adata_VGN1b8": "qc/adata_VGN1b8_clean.h5ad",
"adata_VGN1e1": "qc/adata_VGN1e1_clean.h5ad",
"adata_VGN1e9": "qc/adata_VGN1e9_clean.h5ad",
"adata_VGN1c2": "qc/adata_VGN1c2_clean.h5ad",
"adata_VGN1c3": "qc/adata_VGN1c3_clean.h5ad"
}
# Initialize a dictionary to store the loaded AnnData objects
loaded_adata_objects = {}
# Load each file and store it in the dictionary
for name, filename in adata_filenames.items():
loaded_adata_objects[name] = sc.read_h5ad(filename)
print(f"Loaded {name} from {filename}")
# Access the loaded AnnData objects
adata_VGN1a6 = loaded_adata_objects["adata_VGN1a6"]
adata_VGN1a4 = loaded_adata_objects["adata_VGN1a4"]
adata_VGN1b6 = loaded_adata_objects["adata_VGN1b6"]
adata_VGN1b8 = loaded_adata_objects["adata_VGN1b8"]
adata_VGN1e1 = loaded_adata_objects["adata_VGN1e1"]
adata_VGN1e9 = loaded_adata_objects["adata_VGN1e9"]
adata_VGN1c2 = loaded_adata_objects["adata_VGN1c2"]
adata_VGN1c3 = loaded_adata_objects["adata_VGN1c3"]
Loaded adata_VGN1a6 from qc/adata_VGN1a6_clean.h5ad Loaded adata_VGN1a4 from qc/adata_VGN1a4_clean.h5ad Loaded adata_VGN1b6 from qc/adata_VGN1b6_clean.h5ad Loaded adata_VGN1b8 from qc/adata_VGN1b8_clean.h5ad Loaded adata_VGN1e1 from qc/adata_VGN1e1_clean.h5ad Loaded adata_VGN1e9 from qc/adata_VGN1e9_clean.h5ad Loaded adata_VGN1c2 from qc/adata_VGN1c2_clean.h5ad Loaded adata_VGN1c3 from qc/adata_VGN1c3_clean.h5ad
Now I will look at clustering my data by integrating all the samples together with scanorama and performing leiden clustering¶
I am following this documentation: https://scanpy-tutorials.readthedocs.io/en/latest/spatial/integration-scanorama.html#Data-integration
adata_VGN1a6.obs['dataset'] = 'VGN1a6'
adata_VGN1a4.obs['dataset'] = 'VGN1a4'
adata_VGN1b6.obs['dataset'] = 'VGN1b6'
adata_VGN1b8.obs['dataset'] = 'VGN1b8'
adata_VGN1c2.obs['dataset'] = 'VGN1c2'
adata_VGN1c3.obs['dataset'] = 'VGN1c3'
adata_VGN1e1.obs['dataset'] = 'VGN1e1'
adata_VGN1e9.obs['dataset'] = 'VGN1e9'
adatas = [adata_VGN1a6, adata_VGN1a4, adata_VGN1b6, adata_VGN1b8, adata_VGN1c2, adata_VGN1c3, adata_VGN1e1, adata_VGN1e9]
keys = ['VGN1a6', 'VGN1a4', 'VGN1b6', 'VGN1b8', 'VGN1c2', 'VGN1c3', 'VGN1e1', 'VGN1e9']
# Add unique labels for concatenation
#create new AnnData objects and replace adata.X with the Scanorama-transformed cell-by-gene matrix, while keeping the other metadata in adata as well.
#The adata.X (expression score) will be corrected and scaled so that we can look between different samples and concatenate them into one.
adatas_cor = scanorama.correct_scanpy(adatas, return_dimred=True)
# Concatenate corrected datasets
adata_spatial = sc.concat(
adatas_cor,
label="dataset", # Use the 'dataset' column as label
uns_merge="unique",
keys=keys,
index_unique="-",
)
# perform UMAP clustering of the concatenated samples
sc.pp.neighbors(adata_spatial, use_rep="X_scanorama")
sc.tl.umap(adata_spatial)
sc.tl.leiden(adata_spatial, resolution = 1, key_added="clusters", directed=False)
Found 200 genes among all datasets [[0. 0.74560592 0.62879834 0.53725676 0.55078631 0.41609621 0.36939792 0.36073932] [0. 0. 0.59564917 0.51637399 0.6898982 0.57102584 0.19963785 0.18546845] [0. 0. 0. 0.65685809 0.4371547 0.2406768 0.45359891 0.30592734] [0. 0. 0. 0. 0.4717608 0.37399146 0.34314169 0.3486297 ] [0. 0. 0. 0. 0. 0.83118531 0.33001358 0.27597196] [0. 0. 0. 0. 0. 0. 0.51923947 0.46462715] [0. 0. 0. 0. 0. 0. 0. 0.84384959] [0. 0. 0. 0. 0. 0. 0. 0. ]] Processing datasets (6, 7) Processing datasets (4, 5) Processing datasets (0, 1) Processing datasets (1, 4) Processing datasets (2, 3) Processing datasets (0, 2) Processing datasets (1, 2) Processing datasets (1, 5) Processing datasets (0, 4) Processing datasets (0, 3) Processing datasets (5, 6) Processing datasets (1, 3) Processing datasets (3, 4) Processing datasets (5, 7) Processing datasets (2, 6) Processing datasets (2, 4) Processing datasets (0, 5) Processing datasets (3, 5) Processing datasets (0, 6) Processing datasets (0, 7) Processing datasets (3, 7) Processing datasets (3, 6) Processing datasets (4, 6) Processing datasets (2, 7) Processing datasets (4, 7) Processing datasets (2, 5) Processing datasets (1, 6) Processing datasets (1, 7)
# Visualising the clusters we made by making the UMAP and spatial plots before plotting out individually
# Define the keys in the required order
keys = ['VGN1e1', 'VGN1e9', 'VGN1b6', 'VGN1b8', 'VGN1a6', 'VGN1a4', 'VGN1c2', 'VGN1c3']
# Set plot background to black
plt.style.use('default')
# Create a figure with subplots
fig, axes = plt.subplots(3, 3, figsize=(20, 10)) # 2 rows, 3 columns
# Plot UMAP for "clusters"
sc.pl.umap(
adata_spatial, color="clusters", palette=custom_palette, ax=axes[0, 0], show=False, legend_loc="on data", legend_fontsize=12)
axes[0, 0].set_title("UMAP Clusters")
# Define a dictionary to keep track of colors already added to the legend
legend_colors = {}
# Plot spatial plots for each sample
for i, sample in enumerate(keys):
row, col = divmod(i + 1, 3) # Adjust index to ensure all spatial plots are displayed
ax = axes[row, col]
if sample in ["VGN1a6", "VGN1b6", "VGN1e1", "VGN1e9", "VGN1b8", "VGN1a4"]:
adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
# Swap x and y coordinates
adata_sample.obsm["spatial"][:, [0, 1]] = adata_sample.obsm["spatial"][:, [1, 0]]
if sample == "VGN1a6":
# Rotate 90 degrees counterclockwise
adata_sample.obsm["spatial"][:, 0] = adata_sample.obsm["spatial"][:, 0] * -1
elif sample == "VGN1b6":
# Rotate 90 degrees clockwise
adata_sample.obsm["spatial"][:, 1] = adata_sample.obsm["spatial"][:, 1] * -1
elif sample == "VGN1b8":
# Rotate 90 degrees
adata_sample.obsm["spatial"][:, 1] = adata_sample.obsm["spatial"][:, 1] * -1
elif sample == "VGN1a4":
# Rotate 90 degrees
adata_sample.obsm["spatial"][:, 0] = adata_sample.obsm["spatial"][:, 0] * -1
elif sample == "VGN1e1":
adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
# Rotate 180 degrees separately for each axis
adata_sample.obsm["spatial"][:, 0] *= -1
adata_sample.obsm["spatial"][:, 1] *= -1
elif sample == "VGN1e9":
adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
# Rotate 180 degrees separately for each axis
adata_sample.obsm["spatial"][:, 0] *= -1
adata_sample.obsm["spatial"][:, 1] *= -1
sc.pl.spatial(
adata_sample, # Filter data for the current sample
color="clusters",
spot_size=12, # Provide a spot size value here
title=f"Spatial Map for {sample}", # Set a title for each plot
ax=ax, show=False,
)
else:
sc.pl.spatial(
adata_spatial[adata_spatial.obs['dataset'] == sample], # Filter data for the current sample
color="clusters",
spot_size=12, # Provide a spot size value here
title=f"Spatial Map for {sample}", # Set a title for each plot
ax=ax, show=False,
)
# Adjust layout
plt.tight_layout()
# Remove legends from subplots
for ax in axes.flat:
ax.legend().remove()
plt.subplots_adjust(hspace=0.15, wspace=0.15)
# Create a single legend for the entire figure without color indicators
handles, labels = fig.axes[7].get_legend_handles_labels() # Get legend handles and labels from any subplot
# Print only the first 23 labels
fig.legend(handles[:18], labels[:18], loc='upper right', bbox_to_anchor=(0.99, 0.99), borderaxespad=0., ncol=2, fontsize='large', markerscale=2)
No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
<matplotlib.legend.Legend at 0x165a66c59d0>
sc.pl.umap(
adata_spatial, color="clusters", palette=custom_palette, show=False, legend_loc="on data", legend_fontsize=12, legend_fontoutline= 1 , size=5)
plt.savefig("leidenclustering/UMAP_VRT_A2a_A2b_200genes_res1_white.svg", dpi=300, bbox_inches='tight', format='svg', transparent=True)
Saving cluster information into hdf5 file¶
#this file contains the same information in the index, I will use this for analysis
file_path = "leidenclustering/VRT_A2a_A2b_leiden_200genes_res1_indexed.h5ad"
adata_spatial.write_h5ad(file_path)
#When I am importing .hdf5 files to the VizGen visualiser I need to remove all of the sample information included in the index
# Rename the index column to only include the numbers before the '-' character
adata_indexing = adata_spatial.copy()
adata_indexing.obs.index = [index.split('-')[0] for index in adata_indexing.obs.index]
# Assuming your AnnData object is named 'adata'
file_path = "leidenclustering/VRT_A2a_A2b_leiden_200genes_res1.hdf5"
# Save the AnnData object as an HDF5 file
adata_indexing.write_h5ad(file_path)
Creating a table with each cell and their domain assignment¶
Supplemental Table 9: Cell ID, domain assignment, sample idcluster_info = adata_spatial.obs['clusters'].copy()
cluster_assignment_df = cluster_info.reset_index()
cluster_assignment_df[['cell_id', 'sample']] = cluster_assignment_df['index'].str.split('-', expand=True)
cluster_assignment_df.drop(columns=['index'], inplace=True)
cluster_assignment_df.to_csv("leidenclustering/DomainAssignment_PerCell.csv", index=False)
cluster_assignment_df
| clusters | cell_id | sample | |
|---|---|---|---|
| 0 | 0 | 2305551000002100247 | VGN1a6 |
| 1 | 3 | 2305551000002100249 | VGN1a6 |
| 2 | 9 | 2305551000002100250 | VGN1a6 |
| 3 | 3 | 2305551000002100262 | VGN1a6 |
| 4 | 6 | 2305551000002100268 | VGN1a6 |
| ... | ... | ... | ... |
| 50726 | 4 | 2655507000138100169 | VGN1e9 |
| 50727 | 4 | 2655507000138100170 | VGN1e9 |
| 50728 | 0 | 2655507000138100176 | VGN1e9 |
| 50729 | 4 | 2655507000138100177 | VGN1e9 |
| 50730 | 0 | 2655507000138100178 | VGN1e9 |
50731 rows × 3 columns
Calculating total cells per cluster and percentage of total cells per cluster¶
cluster_info = adata_spatial.obs['clusters'].copy()
df = cluster_info.reset_index()
df[['cell_id', 'sample']] = df['index'].str.split('-', expand=True)
df.drop(columns=['index'], inplace=True)
cluster_sample_counts = df.groupby(['clusters', 'sample']).size().reset_index(name='cell_count')
sample_order = ['VGN1e1', 'VGN1b6', 'VGN1a6', 'VGN1c2']
# Filter the DataFrame to include only the specified samples
cluster_sample_counts_filtered = cluster_sample_counts[cluster_sample_counts['sample'].isin(sample_order)]
# Set the 'sample' column as a categorical type with the specified order
cluster_sample_counts_filtered['sample'] = pd.Categorical(cluster_sample_counts_filtered['sample'], categories=sample_order, ordered=True)
# Sort the DataFrame by the 'sample' column based on the categorical order
cluster_sample_counts_ordered = cluster_sample_counts_filtered.sort_values('sample')
# Calculate the total number of cells per sample
total_cells_per_sample = cluster_sample_counts_ordered.groupby('sample')['cell_count'].sum().reset_index(name='total_cells')
# Merge the total cells with the original filtered DataFrame
cluster_sample_counts_w_total = pd.merge(cluster_sample_counts_ordered, total_cells_per_sample, on='sample')
# Calculate the percentage of cells per cluster in each sample
cluster_sample_counts_w_total['percentage'] = (cluster_sample_counts_w_total['cell_count'] / cluster_sample_counts_w_total['total_cells']) * 100
cluster_sample_counts_w_total.to_csv("leidenclustering/percentages_Fig2f.csv", index=False)
cluster_sample_counts_w_total
| clusters | sample | cell_count | total_cells | percentage | |
|---|---|---|---|---|---|
| 0 | 8 | VGN1e1 | 6 | 2209 | 0.271616 |
| 1 | 16 | VGN1e1 | 0 | 2209 | 0.000000 |
| 2 | 15 | VGN1e1 | 1 | 2209 | 0.045269 |
| 3 | 14 | VGN1e1 | 0 | 2209 | 0.000000 |
| 4 | 13 | VGN1e1 | 116 | 2209 | 5.251245 |
| ... | ... | ... | ... | ... | ... |
| 67 | 10 | VGN1c2 | 871 | 15185 | 5.735924 |
| 68 | 3 | VGN1c2 | 349 | 15185 | 2.298321 |
| 69 | 13 | VGN1c2 | 399 | 15185 | 2.627593 |
| 70 | 9 | VGN1c2 | 598 | 15185 | 3.938097 |
| 71 | 11 | VGN1c2 | 362 | 15185 | 2.383932 |
72 rows × 5 columns
plt.style.use('default')
# Define the custom cluster order
cluster_order = ['17', '16', '14', '15', '10', '7', '8', '13', '11', '9', '5', '6', '2', '12', '1', '4', '0', '3']
# Pivot the DataFrame so that clusters are the index, samples are the columns, and percentage of cells are the values
pivot_df_percentage = cluster_sample_counts_w_total.pivot_table(index='clusters', columns='sample', values='percentage', fill_value=0)
# Reorder the columns according to the custom cluster order
pivot_df_percentage = pivot_df_percentage.reindex(cluster_order)
# Sample-to-stage mapping
sample_to_stage = {
'VGN1e1': 'W2.5',
'VGN1b6': 'W3.25',
'VGN1a6': 'W4',
'VGN1c2': 'W5'
}
# Map the columns (sample IDs) to their corresponding stages
pivot_df_percentage.rename(columns=sample_to_stage, inplace=True)
# Set font sizes for the entire plot
plt.rc('font', size=14) # Global font size
plt.rc('axes', titlesize=20) # Title font size
plt.rc('axes', labelsize=20) # Axis labels font size
plt.rc('xtick', labelsize=20) # X-axis tick labels font size
plt.rc('ytick', labelsize=20) # Y-axis tick labels font size
plt.rc('legend', fontsize=25) # Legend font size
# Create the bar plot
ax = pivot_df_percentage.plot(kind='barh', stacked=True, figsize=(10, 8), color=['#410052', '#34618D', '#7CD250', '#FDE725'], edgecolor='none')
# Set labels and title
ax.set_xlabel('Percentage of Cells', fontsize=16)
ax.set_ylabel('Cluster Number', fontsize=16)
ax.set_title('Percentage of Cells per Cluster by Sample', fontsize=18)
# Customize the spines (borders)
ax.spines['top'].set_color('white') # Set top border to white (invisible)
ax.spines['right'].set_color('white') # Set right border to white (invisible)
ax.spines['bottom'].set_color('black') # Set x-axis spine (bottom) to black
ax.spines['left'].set_color('black') # Set y-axis spine (left) to black
ax.spines['bottom'].set_linewidth(1.5) # Optionally, increase the thickness of x-axis
ax.spines['left'].set_linewidth(1.5) # Optionally, increase the thickness of y-axis
# Add legend outside the plot
plt.legend(title='Developmental Stage', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("leidenclustering/Percentage_Cells_PerCluster_bystage.svg", dpi=300, bbox_inches='tight', format='svg', transparent=True)
plt.show()
Gene enrichment analysis¶
Logistic Regression Description: This method models the probability of a gene being expressed in one cluster versus others A higher score means that the gene is more likely to be associated with the cluster in question. Conversely, lower or negative scores suggest that the gene is less expressed in that cluster relative to others.
See publication: A discriminative learning approach to differential expression analysis for single-cell RNA-seq
sc.tl.rank_genes_groups(adata_spatial, groupby='clusters', method='logreg')
# Inspect the keys in the rank_genes_groups result
result = adata_spatial.uns['rank_genes_groups']
print(result.keys())
dict_keys(['params', 'names', 'scores'])
i want these values exported as a dataframe containing Gene, LogReg Score, and normalised counts (by sample)¶
(Supplementary Table 10)
#loading my adata objects again
adata_filenames = {
"adata_VGN1a6": "qc/adata_VGN1a6_clean.h5ad",
"adata_VGN1a4": "qc/adata_VGN1a4_clean.h5ad",
"adata_VGN1b6": "qc/adata_VGN1b6_clean.h5ad",
"adata_VGN1b8": "qc/adata_VGN1b8_clean.h5ad",
"adata_VGN1e1": "qc/adata_VGN1e1_clean.h5ad",
"adata_VGN1e9": "qc/adata_VGN1e9_clean.h5ad",
"adata_VGN1c2": "qc/adata_VGN1c2_clean.h5ad",
"adata_VGN1c3": "qc/adata_VGN1c3_clean.h5ad"
}
# Initialize a dictionary to store the loaded AnnData objects
loaded_adata_objects = {}
# Load each file and store it in the dictionary
for name, filename in adata_filenames.items():
loaded_adata_objects[name] = sc.read_h5ad(filename)
print(f"Loaded {name} from {filename}")
# Access the loaded AnnData objects
adata_VGN1a6 = loaded_adata_objects["adata_VGN1a6"]
adata_VGN1a4 = loaded_adata_objects["adata_VGN1a4"]
adata_VGN1b6 = loaded_adata_objects["adata_VGN1b6"]
adata_VGN1b8 = loaded_adata_objects["adata_VGN1b8"]
adata_VGN1e1 = loaded_adata_objects["adata_VGN1e1"]
adata_VGN1e9 = loaded_adata_objects["adata_VGN1e9"]
adata_VGN1c2 = loaded_adata_objects["adata_VGN1c2"]
adata_VGN1c3 = loaded_adata_objects["adata_VGN1c3"]
Loaded adata_VGN1a6 from qc/adata_VGN1a6_clean.h5ad Loaded adata_VGN1a4 from qc/adata_VGN1a4_clean.h5ad Loaded adata_VGN1b6 from qc/adata_VGN1b6_clean.h5ad Loaded adata_VGN1b8 from qc/adata_VGN1b8_clean.h5ad Loaded adata_VGN1e1 from qc/adata_VGN1e1_clean.h5ad Loaded adata_VGN1e9 from qc/adata_VGN1e9_clean.h5ad Loaded adata_VGN1c2 from qc/adata_VGN1c2_clean.h5ad Loaded adata_VGN1c3 from qc/adata_VGN1c3_clean.h5ad
# List of adata objects and their corresponding output file paths
adatas = [
(adata_VGN1a6, "VGN1a6"),
(adata_VGN1a4, "VGN1a4"),
(adata_VGN1b6, "VGN1b6"),
(adata_VGN1b8, "VGN1b8"),
(adata_VGN1c2, "VGN1c2"),
(adata_VGN1c3, "VGN1c3"),
(adata_VGN1e1, "VGN1e1"),
(adata_VGN1e9, "VGN1e9")
]
# making a dataframe with cluster assignments
clusters_df = pd.DataFrame(adata_spatial.obs['clusters'])
clusters_df.index = clusters_df.index.str.split('-').str[0]
# Dictionary to store DataFrames
dataframes = {}
# extracting normalised expression counts
def process_adata(adata, clusters_df, df_name):
# Convert sparse matrix to dense if necessary
if isinstance(adata.X, scipy.sparse.spmatrix):
X_dense = adata.X.toarray()
else:
X_dense = adata.X
# Create gene expression matrix dataframe
cell_names = adata.obs_names
gene_names = adata.var_names
gene_expression_matrix = pd.DataFrame(data=X_dense, index=cell_names, columns=gene_names)
# Merge df_clusters with df_counts on index (cell ID)
merged_df = pd.merge(gene_expression_matrix, clusters_df, left_index=True, right_index=True)
# Group by clusters and calculate the average counts for each transcript ID
cluster_counts = merged_df.groupby('clusters').mean()
cluster_counts = cluster_counts.transpose()
# Replace columns with NaN in cases of few cells (very low represented cell groups)
if df_name in ['VGN1e1', 'VGN1e9']:
cluster_counts.loc[:, ['7', '5', '15', '8']] = np.nan
elif df_name in ['VGN1b6', 'VGN1b8']:
cluster_counts.loc[:, ['7', '15']] = np.nan
elif df_name in ['VGN1a4', 'VGN1a6']:
cluster_counts.loc[:, ['17']] = np.nan
# Store the DataFrame in the dictionary
dataframes[df_name] = cluster_counts
# Process each adata object and store the results in dataframes dictionary
for adata, df_name in adatas:
process_adata(adata, clusters_df, df_name)
# Access the DataFrames using the keys
VGN1a6 = dataframes['VGN1a6']
VGN1a4 = dataframes['VGN1a4']
VGN1b6 = dataframes['VGN1b6']
VGN1b8 = dataframes['VGN1b8']
VGN1c2 = dataframes['VGN1c2']
VGN1c3 = dataframes['VGN1c3']
VGN1e1 = dataframes['VGN1e1']
VGN1e9 = dataframes['VGN1e9']
# save results of rank gene groups
result = adata_spatial.uns['rank_genes_groups']
# Get the cluster names
cluster_names = result['names'].dtype.names
# Initialize lists to store data
genes = []
scores = []
clusters = []
# Iterate over each cluster to populate lists
for cluster in cluster_names:
cluster_genes = result['names'][cluster] # Get gene names for the cluster
cluster_scores = result['scores'][cluster] # Get scores for the cluster
# Append each gene and its corresponding score to the lists
for gene, score in zip(cluster_genes, cluster_scores):
genes.append(gene)
scores.append(score)
clusters.append(cluster) # Add the cluster name
# Create a DataFrame
markergene_df = pd.DataFrame({
'gene': genes,
'score': scores,
'cluster': clusters
})
# Restructure the dataframe using pivot
restructured_df = markergene_df.pivot(
index='gene', # Use 'gene' as the index
columns='cluster', # Use 'cluster' to create new columns
values='score' # Populate the new columns with 'score' values
)
# Rename columns to include '_EnrichmentScore' suffix
restructured_df.columns = [f"{cluster}_EnrichmentScore" for cluster in restructured_df.columns]
# Reset index to make 'gene' a regular column
restructured_df.reset_index(inplace=True)
# Sort enrichment score columns numerically
enrichment_cols = sorted(
[col for col in restructured_df.columns if '_EnrichmentScore' in col], # Filter for enrichment score columns
key=lambda x: int(x.split('_')[0]) # Extract numeric part and sort
)
# Reorder the dataframe columns
restructured_df = restructured_df[['gene'] + enrichment_cols]
restructured_df
| gene | 0_EnrichmentScore | 1_EnrichmentScore | 2_EnrichmentScore | 3_EnrichmentScore | 4_EnrichmentScore | 5_EnrichmentScore | 6_EnrichmentScore | 7_EnrichmentScore | 8_EnrichmentScore | 9_EnrichmentScore | 10_EnrichmentScore | 11_EnrichmentScore | 12_EnrichmentScore | 13_EnrichmentScore | 14_EnrichmentScore | 15_EnrichmentScore | 16_EnrichmentScore | 17_EnrichmentScore | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TraesCS1A02G052000 | -1.899248 | -0.265776 | -1.618058 | -0.018310 | 0.812011 | -1.089552 | -0.354154 | -1.914170 | -0.421712 | -0.826297 | -1.008212 | -0.002280 | 1.397553 | -0.645299 | 1.715561 | 2.230191 | 0.119648 | 3.788102 |
| 1 | TraesCS1A02G077800 | -3.895693 | -0.690553 | -2.864961 | -1.759157 | -0.803881 | -1.694246 | 5.860512 | 3.174781 | -1.542214 | 0.982594 | 0.605621 | -1.146341 | -1.382980 | 5.235276 | -0.493602 | -0.419315 | 0.979379 | -0.145220 |
| 2 | TraesCS1A02G154900 | -0.840187 | -0.257462 | -0.157702 | 2.576114 | 0.632968 | -0.774375 | -0.272984 | -0.132877 | -0.479977 | 1.849547 | -0.442009 | 0.202528 | -0.909859 | -0.269406 | -0.000572 | -0.330262 | -0.221848 | -0.171638 |
| 3 | TraesCS1A02G156100 | -2.038370 | -0.190227 | -0.412845 | -0.868299 | 0.177898 | 1.730197 | 0.238617 | 0.860592 | -0.565050 | -0.797859 | 0.608742 | 2.955381 | -0.748560 | -1.093605 | 1.253467 | -0.466003 | -0.414292 | -0.229786 |
| 4 | TraesCS1A02G199600 | 2.371270 | -0.677488 | 2.211071 | -2.370305 | -1.328793 | 1.440920 | -0.629660 | 0.245045 | -0.228128 | -0.972547 | -0.781369 | 2.029766 | 0.734890 | 1.045224 | -0.880091 | -1.509792 | -0.723550 | 0.023538 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 195 | TraesCS7D02G342300 | -1.323906 | -0.532578 | -1.139863 | -0.538103 | 3.260145 | -1.136548 | -0.870831 | 0.567301 | -0.666863 | -0.320511 | -0.409740 | 2.506989 | -1.027976 | -0.739799 | 1.096642 | 1.756481 | -0.010124 | -0.470718 |
| 196 | TraesCS7D02G379200 | -0.698107 | -0.069254 | -0.706524 | -0.848616 | -0.062484 | 2.485972 | -0.639519 | -0.449304 | 0.007441 | -0.439396 | 0.193667 | 2.389086 | 0.058641 | -0.327640 | 0.217816 | -0.309713 | -0.305536 | -0.496528 |
| 197 | TraesCS7D02G388600 | -0.359088 | 0.125317 | -0.269723 | 1.336437 | 0.737247 | -0.095077 | -0.761950 | -0.204690 | 0.696411 | 0.055025 | -0.285617 | -0.173296 | -0.391878 | 0.197854 | -0.350791 | -0.328775 | 0.035662 | 0.036933 |
| 198 | TraesCS7D02G521200 | 0.681640 | -0.897675 | -1.102229 | -0.981968 | -1.312759 | -0.640440 | 2.357756 | 0.062453 | 0.057815 | -0.553182 | -0.377786 | -0.071443 | 0.849560 | 0.140697 | -0.332130 | 0.104542 | 0.190743 | 1.824404 |
| 199 | TraesCSU02G093200 | 0.042112 | -0.337133 | 0.148754 | -0.008615 | 0.248737 | 0.388551 | -0.155734 | -0.681636 | 0.523509 | 0.307238 | -0.293516 | -0.272808 | -0.033067 | -0.092026 | -0.127859 | 0.111299 | 0.130460 | 0.101733 |
200 rows × 19 columns
# Ensure 'gene' is the index in the restructured dataframe for alignment
restructured_df.set_index('gene', inplace=True)
# Initialize the base dataframe with enrichment scores
enrichmentscore_normexp_combined_df = restructured_df.copy()
# Iterate over the sample-specific dataframes in the dictionary
for sample, sample_df in dataframes.items():
# Reset index to bring 'gene' into a column
sample_df = sample_df.reset_index()
# Align the sample dataframe with the restructured dataframe on 'gene'
sample_df = sample_df.set_index('index') # Use 'index' (former gene names) as the new index
sample_df = sample_df.rename_axis('gene') # Rename the index to 'gene'
# Add sample-specific prefixes to columns
sample_df = sample_df.rename(columns=lambda col: f"{sample}_{col}")
# Merge the sample data with the combined dataframe
enrichmentscore_normexp_combined_df = enrichmentscore_normexp_combined_df.join(sample_df, how='left')
# Reset the index to make 'gene' a regular column
enrichmentscore_normexp_combined_df.reset_index(inplace=True)
# Desired sample order
sample_order = ['VGN1e1', 'VGN1e9', 'VGN1b6', 'VGN1b8', 'VGN1a6', 'VGN1a4', 'VGN1c2', 'VGN1c3']
# Get the list of expression columns to order
expression_columns = [col for col in enrichmentscore_normexp_combined_df.columns if any(sample in col for sample in sample_order)]
# Sort the expression columns by the desired sample order
ordered_expression_columns = sorted(expression_columns, key=lambda col: sample_order.index(col.split('_')[0]))
# Keep non-expression columns (e.g., 'gene', enrichment scores) in the beginning
non_expression_columns = [col for col in enrichmentscore_normexp_combined_df.columns if col not in expression_columns]
# Reorder the dataframe columns
enrichmentscore_normexp_combined_df = enrichmentscore_normexp_combined_df[non_expression_columns + ordered_expression_columns]
# Save or inspect the final dataframe
enrichmentscore_normexp_combined_df
| gene | 0_EnrichmentScore | 1_EnrichmentScore | 2_EnrichmentScore | 3_EnrichmentScore | 4_EnrichmentScore | 5_EnrichmentScore | 6_EnrichmentScore | 7_EnrichmentScore | 8_EnrichmentScore | ... | VGN1c3_8 | VGN1c3_9 | VGN1c3_10 | VGN1c3_11 | VGN1c3_12 | VGN1c3_13 | VGN1c3_14 | VGN1c3_15 | VGN1c3_16 | VGN1c3_17 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TraesCS1A02G052000 | -1.899248 | -0.265776 | -1.618058 | -0.018310 | 0.812011 | -1.089552 | -0.354154 | -1.914170 | -0.421712 | ... | 0.016740 | 0.027439 | 0.045747 | 0.047847 | 0.017383 | 0.021586 | 0.353534 | 0.229726 | 0.063127 | 0.372287 |
| 1 | TraesCS1A02G077800 | -3.895693 | -0.690553 | -2.864961 | -1.759157 | -0.803881 | -1.694246 | 5.860512 | 3.174781 | -1.542214 | ... | 0.141597 | 0.403543 | 0.189756 | 0.024777 | 0.149514 | 0.996244 | 0.011831 | 0.059267 | 0.131254 | 0.064863 |
| 2 | TraesCS1A02G154900 | -0.840187 | -0.257462 | -0.157702 | 2.576114 | 0.632968 | -0.774375 | -0.272984 | -0.132877 | -0.479977 | ... | 0.005315 | 0.051778 | 0.005273 | 0.015205 | 0.055854 | 0.017627 | 0.015629 | 0.004341 | 0.013033 | 0.009766 |
| 3 | TraesCS1A02G156100 | -2.038370 | -0.190227 | -0.412845 | -0.868299 | 0.177898 | 1.730197 | 0.238617 | 0.860592 | -0.565050 | ... | 0.058267 | 0.040335 | 0.102397 | 0.302900 | 0.172868 | 0.087936 | 0.159085 | 0.060469 | 0.041710 | 0.053610 |
| 4 | TraesCS1A02G199600 | 2.371270 | -0.677488 | 2.211071 | -2.370305 | -1.328793 | 1.440920 | -0.629660 | 0.245045 | -0.228128 | ... | 0.090973 | 0.028937 | 0.025283 | 0.181941 | 0.220940 | 0.138797 | 0.030069 | 0.029664 | 0.031233 | 0.050177 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 195 | TraesCS7D02G342300 | -1.323906 | -0.532578 | -1.139863 | -0.538103 | 3.260145 | -1.136548 | -0.870831 | 0.567301 | -0.666863 | ... | 0.009702 | 0.002303 | 0.090308 | 0.229741 | 0.000000 | 0.010370 | 0.180497 | 0.195012 | 0.085941 | 0.124137 |
| 196 | TraesCS7D02G379200 | -0.698107 | -0.069254 | -0.706524 | -0.848616 | -0.062484 | 2.485972 | -0.639519 | -0.449304 | 0.007441 | ... | 0.026119 | 0.011584 | 0.051343 | 0.189548 | 0.000000 | 0.023589 | 0.042417 | 0.010876 | 0.043627 | 0.009813 |
| 197 | TraesCS7D02G388600 | -0.359088 | 0.125317 | -0.269723 | 1.336437 | 0.737247 | -0.095077 | -0.761950 | -0.204690 | 0.696411 | ... | 0.084736 | 0.091440 | 0.049619 | 0.095866 | 0.133632 | 0.061324 | 0.048247 | 0.062308 | 0.032471 | 0.053517 |
| 198 | TraesCS7D02G521200 | 0.681640 | -0.897675 | -1.102229 | -0.981968 | -1.312759 | -0.640440 | 2.357756 | 0.062453 | 0.057815 | ... | 0.032484 | 0.015165 | 0.095812 | 0.096225 | 0.020511 | 0.034811 | 0.162881 | 0.227820 | 0.138573 | 0.389201 |
| 199 | TraesCSU02G093200 | 0.042112 | -0.337133 | 0.148754 | -0.008615 | 0.248737 | 0.388551 | -0.155734 | -0.681636 | 0.523509 | ... | 0.051506 | 0.057186 | 0.048762 | 0.034607 | 0.000000 | 0.078886 | 0.035634 | 0.060346 | 0.055439 | 0.055034 |
200 rows × 163 columns
enrichmentscore_normexp_combined_df2 = enrichmentscore_normexp_combined_df.copy()
# Define the mapping of replacements
replacement_map = {
'VGN1e1': 'LDR_WT_cluster',
'VGN1e9': 'LDR_VRT-A2b_cluster',
'VGN1b6': 'LP_WT_cluster',
'VGN1b8': 'LP_VRT-A2b_cluster',
'VGN1a6': 'TS_WT_cluster',
'VGN1a4': 'TS_VRT-A2b_cluster',
'VGN1c2': 'CER_WT_cluster',
'VGN1c3': 'CER_VRT-A2b_cluster'
}
# Rename columns using the mapping
def rename_columns(col_name):
for old_value, new_value in replacement_map.items():
if col_name.startswith(old_value):
return col_name.replace(old_value, new_value)
return col_name
enrichmentscore_normexp_combined_df2.rename(columns=rename_columns, inplace=True)
enrichmentscore_normexp_combined_df2
enrichmentscore_normexp_combined_df2.to_csv("enrichmentanalysis/LogRegScore_NormExpAvg_allsamples.csv", index=False)
Now I want to select the top marker genes for each cluster and put these in a table¶
First I need to select what the 'cut off' is for enriched genes so I will look at the distribution
Then I will determine a point at which I consider the gene enriched with a +2 STD cut off point
# Calculate mean and standard deviation of the 'score' column
mean_score = markergene_df['score'].mean()
std_score = markergene_df['score'].std()
# Calculate the +2 SD cutoff
cutoff = mean_score + 2 * std_score
print(f"Mean: {mean_score}, Standard Deviation: {std_score}")
print(f"Cutoff for significantly enriched scores: {cutoff}")
# Filter the DataFrame for significant scores
significant_scores_df = markergene_df[markergene_df['score'] > cutoff]
# Display significant scores
print(significant_scores_df)
plt.figure(figsize=(10, 6))
plt.hist(markergene_df['score'], bins=50, color='skyblue', edgecolor='black', alpha=0.7)
plt.axvline(cutoff, color='red', linestyle='--', linewidth=2, label=f'Cutoff (+2 SD = {cutoff:.2f})')
plt.title('Frequency Distribution of Scores', fontsize=16)
plt.xlabel('Score', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
Mean: 0.0, Standard Deviation: 1.8383444547653198
Cutoff for significantly enriched scores: 3.6766889095306396
gene score cluster
0 TraesCS4A02G256700 9.094888 0
1 TraesCS4B02G064000 6.216168 0
2 TraesCS3D02G284200 4.588726 0
200 TraesCS4D02G296400 13.337225 1
201 TraesCS6D02G220400 8.891061 1
... ... ... ...
3403 TraesCS7D02G261600 6.040273 17
3404 TraesCS1A02G264300 5.881382 17
3405 TraesCS6D02G220400 4.888692 17
3406 TraesCS2B02G464200 3.843694 17
3407 TraesCS1A02G052000 3.788102 17
[114 rows x 3 columns]
So for all clusters, the +2 STD cut off is 3.68. I will calculate this for each cluster individually to see if this is very variable by cluster.
# Group the dataframe by 'cluster' and process each group
cluster_results = {} # To store results for each cluster
for cluster, group in markergene_df.groupby('cluster'):
# Calculate mean and standard deviation for the current cluster
mean_score = group['score'].mean()
std_score = group['score'].std()
# Calculate the +2 SD cutoff
cutoff = mean_score + 2 * std_score
# Filter significant scores for the current cluster
significant_scores = group[group['score'] > cutoff]
# Store results
cluster_results[cluster] = {
'mean': mean_score,
'std': std_score,
'cutoff': cutoff,
'significant_scores': significant_scores
}
# Print summary for this cluster
print(f"Cluster: {cluster}")
print(f" Mean: {mean_score:.2f}")
print(f" Standard Deviation: {std_score:.2f}")
print(f" Cutoff (+2 SD): {cutoff:.2f}")
print(f" Significant Scores Count: {len(significant_scores)}")
print()
# Combine all significant scores into a single DataFrame (optional)
all_significant_scores = pd.concat([res['significant_scores'] for res in cluster_results.values()])
Cluster: 0 Mean: -0.32 Standard Deviation: 1.92 Cutoff (+2 SD): 3.51 Significant Scores Count: 3 Cluster: 1 Mean: -0.11 Standard Deviation: 2.04 Cutoff (+2 SD): 3.98 Significant Scores Count: 6 Cluster: 10 Mean: 0.03 Standard Deviation: 1.54 Cutoff (+2 SD): 3.12 Significant Scores Count: 9 Cluster: 11 Mean: -0.13 Standard Deviation: 1.80 Cutoff (+2 SD): 3.46 Significant Scores Count: 5 Cluster: 12 Mean: 0.12 Standard Deviation: 1.92 Cutoff (+2 SD): 3.96 Significant Scores Count: 6 Cluster: 13 Mean: 0.14 Standard Deviation: 1.71 Cutoff (+2 SD): 3.55 Significant Scores Count: 6 Cluster: 14 Mean: 0.08 Standard Deviation: 1.56 Cutoff (+2 SD): 3.20 Significant Scores Count: 10 Cluster: 15 Mean: 0.11 Standard Deviation: 1.92 Cutoff (+2 SD): 3.96 Significant Scores Count: 6 Cluster: 16 Mean: 0.20 Standard Deviation: 1.76 Cutoff (+2 SD): 3.72 Significant Scores Count: 7 Cluster: 17 Mean: 0.11 Standard Deviation: 1.54 Cutoff (+2 SD): 3.20 Significant Scores Count: 10 Cluster: 2 Mean: -0.04 Standard Deviation: 1.85 Cutoff (+2 SD): 3.66 Significant Scores Count: 5 Cluster: 3 Mean: -0.22 Standard Deviation: 1.98 Cutoff (+2 SD): 3.74 Significant Scores Count: 8 Cluster: 4 Mean: -0.11 Standard Deviation: 2.06 Cutoff (+2 SD): 4.00 Significant Scores Count: 7 Cluster: 5 Mean: -0.05 Standard Deviation: 1.87 Cutoff (+2 SD): 3.70 Significant Scores Count: 4 Cluster: 6 Mean: -0.05 Standard Deviation: 1.87 Cutoff (+2 SD): 3.69 Significant Scores Count: 8 Cluster: 7 Mean: 0.17 Standard Deviation: 1.95 Cutoff (+2 SD): 4.07 Significant Scores Count: 5 Cluster: 8 Mean: 0.12 Standard Deviation: 1.81 Cutoff (+2 SD): 3.74 Significant Scores Count: 3 Cluster: 9 Mean: -0.03 Standard Deviation: 1.84 Cutoff (+2 SD): 3.65 Significant Scores Count: 8
The scores are not very variable, they vary from 3.12-4.07 But I will apply these for each for the clusters to get my 'top genes' and see how many markers for each this gives me
# Create an empty list to collect filtered data for each cluster
filtered_data = []
# Dictionary to store row counts for each cluster
cluster_row_counts = {}
# Loop through each cluster group
for cluster, group in markergene_df.groupby('cluster'):
# Calculate mean and standard deviation for the current cluster
mean_score = group['score'].mean()
std_score = group['score'].std()
# Calculate the +2 SD cutoff
cutoff = mean_score + 2 * std_score
# Filter the group for scores above the cutoff
filtered_group = group[group['score'] > cutoff]
# Append the filtered group to the list
filtered_data.append(filtered_group)
# Store the number of rows in the filtered group
cluster_row_counts[cluster] = len(filtered_group)
# Combine all filtered groups into a single DataFrame
filtered_markergene_df = pd.concat(filtered_data)
# Reset index for clarity (optional)
filtered_markergene_df.reset_index(drop=True, inplace=True)
# Print the row counts for each cluster
print("Number of rows per cluster in the filtered DataFrame:")
for cluster, count in cluster_row_counts.items():
print(f"Cluster {cluster}: {count} genes")
# Display the total number of rows in the filtered DataFrame
print(f"\nTotalfiltered marker genes: {len(filtered_markergene_df)}")
# Display the filtered DataFrame (optional)
print(filtered_markergene_df)
Number of rows per cluster in the filtered DataFrame:
Cluster 0: 3 genes
Cluster 1: 6 genes
Cluster 10: 9 genes
Cluster 11: 5 genes
Cluster 12: 6 genes
Cluster 13: 6 genes
Cluster 14: 10 genes
Cluster 15: 6 genes
Cluster 16: 7 genes
Cluster 17: 10 genes
Cluster 2: 5 genes
Cluster 3: 8 genes
Cluster 4: 7 genes
Cluster 5: 4 genes
Cluster 6: 8 genes
Cluster 7: 5 genes
Cluster 8: 3 genes
Cluster 9: 8 genes
Totalfiltered marker genes: 116
gene score cluster
0 TraesCS4A02G256700 9.094888 0
1 TraesCS4B02G064000 6.216168 0
2 TraesCS3D02G284200 4.588726 0
3 TraesCS4D02G296400 13.337225 1
4 TraesCS6D02G220400 8.891061 1
.. ... ... ...
111 TraesCS7B02G413900 5.830931 9
112 TraesCS7A02G175200 4.953066 9
113 TraesCS5A02G356100 4.799537 9
114 TraesCS2B02G399800 4.534127 9
115 TraesCS5A02G473800 3.859297 9
[116 rows x 3 columns]
Now I want to annotate these markers with their names¶
This will be used for Supplementary Table H, with added annotations applied by hand (including literature and label name)
annotation = pd.read_csv('annotated_names_12_24.csv').dropna()
annotation = annotation.rename(columns={'gene_id': 'gene'})
annotation_subset = annotation[['gene', 'annotated_name']]
# Merge filtered_markergene_df with the annotation_subset on the 'gene' column
filtered_markergene_df_annotated = filtered_markergene_df.merge(annotation_subset, on='gene', how = 'left')
filtered_markergene_df_annotated['annotated_name'] = filtered_markergene_df_annotated['annotated_name'].fillna('')
filtered_markergene_df_annotated
formatted_markergene_df = (
filtered_markergene_df_annotated.drop(columns='score')
.groupby('cluster')
.agg({
'gene': lambda x: ', '.join(filter(None, x)), # Skip empty strings
'annotated_name': lambda x: ', '.join(filter(None, x)) # Skip empty strings
})
.reset_index()
)
# Display the formatted DataFrame
formatted_markergene_df.to_csv("enrichmentanalysis/topenrichedgenes_domainnames.csv", index=False)
formatted_markergene_df
| cluster | gene | annotated_name | |
|---|---|---|---|
| 0 | 0 | TraesCS4A02G256700, TraesCS4B02G064000, TraesC... | TraesCS4A02G256700_TaKNOX5, TraesCS4B02G064000... |
| 1 | 1 | TraesCS4D02G296400, TraesCS6D02G220400, TraesC... | TraesCS4D02G296400, TraesCS6D02G220400_TaYABBY... |
| 2 | 10 | TraesCS7A02G383800, TraesCS7D02G261600, TraesC... | TraesCS7A02G383800_AP3_OsMADS16, TraesCS7D02G2... |
| 3 | 11 | TraesCS7D02G246100, TraesCS3D02G284200, TraesC... | TraesCS7D02G246100_OsCUC3, TraesCS3D02G284200_... |
| 4 | 12 | TraesCS6A02G287300, TraesCS1A02G418200, TraesC... | TraesCS6A02G287300_OsLEC1, TraesCS1A02G418200_... |
| 5 | 13 | TraesCS6D02G220400, TraesCS1D02G162600, TraesC... | TraesCS6D02G220400_TaYABBY7D, TraesCS1D02G1626... |
| 6 | 14 | TraesCS4D02G296400, TraesCS1D02G197300, TraesC... | TraesCS4D02G296400, TraesCS1D02G197300_OsROC3t... |
| 7 | 15 | TraesCS6A02G259000, TraesCS2B02G464200, TraesC... | TraesCS6A02G259000_AGL6_OsMADS6, TraesCS2B02G4... |
| 8 | 16 | TraesCS6A02G259000, TraesCS1D02G127700, TraesC... | TraesCS6A02G259000_AGL6_OsMADS6, TraesCS1D02G1... |
| 9 | 17 | TraesCS6A02G259000, TraesCS7A02G383800, TraesC... | TraesCS6A02G259000_AGL6_OsMADS6, TraesCS7A02G3... |
| 10 | 2 | TraesCS6D02G220400, TraesCS2B02G403100, TraesC... | TraesCS6D02G220400_TaYABBY7D, TraesCS2B02G4031... |
| 11 | 3 | TraesCS1B02G042200, TraesCS4A02G256700, TraesC... | TraesCS1B02G042200_MT2, TraesCS4A02G256700_TaK... |
| 12 | 4 | TraesCS7A02G308400, TraesCS4D02G296400, TraesC... | TraesCS7A02G308400_OsROC7t, TraesCS4D02G296400... |
| 13 | 5 | TraesCS3D02G284200, TraesCS1B02G042200, TraesC... | TraesCS3D02G284200_OsMADS32, TraesCS1B02G04220... |
| 14 | 6 | TraesCS1B02G479300, TraesCS6A02G176400, TraesC... | TraesCS1B02G479300, TraesCS6A02G176400_SRZ1, T... |
| 15 | 7 | TraesCS6A02G259000, TraesCS4A02G256700, TraesC... | TraesCS6A02G259000_AGL6_OsMADS6, TraesCS4A02G2... |
| 16 | 8 | TraesCS4D02G245300, TraesCS2B02G403100, TraesC... | TraesCS4D02G245300_TaYABBY4D, TraesCS2B02G4031... |
| 17 | 9 | TraesCS4B02G064000, TraesCS6A02G313800, TraesC... | TraesCS4B02G064000_OsGIF3, TraesCS6A02G313800_... |
Creating high quality UMAPs and spatial plots for publication¶
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'1': '#e10e3c',
'2': '#f47140',
'3': '#f0b932',
'4' : '#ffd95a',
'5': '#c2ed51',
'6': '#8ed73d',
'7': '#489c73',
'8': '#1a8d83',
'9': '#4915be',
'10': '#2357d5',
'11': '#fed2ff',
'12': '#6ecad5',
'13': '#bd08f9',
'14': '#886fc2',
'15': '#e170c0',
'16': '#ff33cc',
'17': '#e4a9e8'
}
default_color = 'white'
plt.rcParams['xtick.major.size'] = 10
plt.rcParams['ytick.major.size'] = 10
mapping late double ridge WT¶
# Filter to create a new AnnData object with only the sample 'VGN1e1'
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()
adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1e_region1_output/cellpose2_micron_space_VGN1e1.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1e1_df['cell_id'] = adata_VGN1e1_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e1_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1e1_df = adata_VGN1e1.obs[['clusters']].copy()
adata_VGN1e1_df.index = adata_VGN1e1_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e1_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 48334 | 48334 | 2343479300017100075 | 1 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 3.0 | None | 6 |
| 48829 | 48829 | 2343479300017100075 | 6 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 10.5 | None | 6 |
| 48664 | 48664 | 2343479300017100075 | 0 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 1.5 | None | 6 |
| 48499 | 48499 | 2343479300017100075 | 5 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 9.0 | None | 6 |
| 48004 | 48004 | 2343479300017100075 | 2 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 4.5 | None | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28175 | 28175 | 2343479300098100014 | 2 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 4.5 | None | 12 |
| 28195 | 28195 | 2343479300098100014 | 0 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 1.5 | None | 12 |
| 28200 | 28200 | 2343479300098100014 | 6 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 10.5 | None | 12 |
| 28185 | 28185 | 2343479300098100014 | 1 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 3.0 | None | 12 |
| 28170 | 28170 | 2343479300098100014 | 3 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 6.0 | None | 12 |
15463 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 3373, Max X: 4451 Min Y: 6363, Max Y: 7851
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([3200, 4500])
ax.set_ylim([6400, 7800])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
import numpy as np
from shapely.geometry import Polygon, MultiPolygon
from shapely.affinity import affine_transform
from matplotlib.lines import Line2D
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = 20
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([3400, 4500]) # Update if necessary after rotation
ax.set_ylim([6200, 8000]) # Update if necessary after rotation
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 3700 # X-coordinate for the start of the scale bar
scale_bar_y_start = 6300 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1e1_domainmap_500scalebar.png', bbox_inches='tight', dpi=700, format='png', transparent=False)
plt.show()
### mapping late double ridge MUT
# Filter to create a new AnnData object with only the sample 'VGN1e9'
adata_VGN1e9 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e9'].copy()
adata_VGN1e9.obs.index = [index.split('-')[0] for index in adata_VGN1e9.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e9_df = adata_VGN1e9.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e9_df = adata_VGN1e9_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1e_region9_output/cellpose2_micron_space_VGN1e9.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1e9_df['cell_id'] = adata_VGN1e9_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e9_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1e9_df = adata_VGN1e9.obs[['clusters']].copy()
adata_VGN1e9_df.index = adata_VGN1e9_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e9_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 56991 | 56991 | 2655507000046100147 | 3 | MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... | None | None | cell | 6.0 | None | 6 |
| 57911 | 57911 | 2655507000046100147 | 5 | MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... | None | None | cell | 9.0 | None | 6 |
| 57451 | 57451 | 2655507000046100147 | 4 | MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... | None | None | cell | 7.5 | None | 6 |
| 58141 | 58141 | 2655507000046100147 | 0 | MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... | None | None | cell | 1.5 | None | 6 |
| 57681 | 57681 | 2655507000046100147 | 1 | MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... | None | None | cell | 3.0 | None | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 15423 | 15423 | 2655507000138100178 | 5 | MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... | None | None | cell | 9.0 | None | 0 |
| 15163 | 15163 | 2655507000138100178 | 3 | MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... | None | None | cell | 6.0 | None | 0 |
| 15358 | 15358 | 2655507000138100178 | 1 | MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... | None | None | cell | 3.0 | None | 0 |
| 15553 | 15553 | 2655507000138100178 | 6 | MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... | None | None | cell | 10.5 | None | 0 |
| 15488 | 15488 | 2655507000138100178 | 0 | MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... | None | None | cell | 1.5 | None | 0 |
10983 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 6525, Max X: 7400 Min Y: 7978, Max Y: 9548
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([6700, 7500])
ax.set_ylim([8000, 9600])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = 20
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([6700, 7400])
ax.set_ylim([8200, 9700])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 6720 # X-coordinate for the start of the scale bar
scale_bar_y_start = 8350 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1e9_domainmap_500scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
mapping lemma primordia WT¶
# Filter to create a new AnnData object with only the sample 'VGN1b6'
adata_VGN1b6 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1b6'].copy()
adata_VGN1b6.obs.index = [index.split('-')[0] for index in adata_VGN1b6.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1b6_df = adata_VGN1b6.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1b6_df = adata_VGN1b6_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1b_region6_output/cellpose2_micron_space_VGN1b6.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1b6_df['cell_id'] = adata_VGN1b6_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1b6_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1b6_df = adata_VGN1b6.obs[['clusters']].copy()
adata_VGN1b6_df.index = adata_VGN1b6_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1b6_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 18274 | 18274 | 2293012600006100168 | 1 | MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... | None | None | cell | 3.0 | None | 4 |
| 17778 | 17778 | 2293012600006100168 | 2 | MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... | None | None | cell | 4.5 | None | 4 |
| 18770 | 18770 | 2293012600006100168 | 0 | MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... | None | None | cell | 1.5 | None | 4 |
| 17530 | 17530 | 2293012600006100168 | 3 | MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... | None | None | cell | 6.0 | None | 4 |
| 19018 | 19018 | 2293012600006100168 | 6 | MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... | None | None | cell | 10.5 | None | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 62520 | 62520 | 2293012600036100142 | 6 | MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... | None | None | cell | 10.5 | None | 9 |
| 61842 | 61842 | 2293012600036100142 | 3 | MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... | None | None | cell | 6.0 | None | 9 |
| 61955 | 61955 | 2293012600036100142 | 2 | MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... | None | None | cell | 4.5 | None | 9 |
| 62294 | 62294 | 2293012600036100142 | 5 | MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... | None | None | cell | 9.0 | None | 9 |
| 62068 | 62068 | 2293012600036100142 | 4 | MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... | None | None | cell | 7.5 | None | 9 |
20272 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 5003, Max X: 5796 Min Y: 4333, Max Y: 6075
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([4800, 5900])
ax.set_ylim([4200, 6200])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = 165
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([4800, 5900])
ax.set_ylim([4200, 6200])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 4830 # X-coordinate for the start of the scale bar
scale_bar_y_start = 4350 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1b6_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
I also want to make a domain map for the specific ridges
# Define custom colors for specific clusters
custom_cluster_colors2 = {
'1': '#e10e3c',
'2': '#f47140',
'15': '#e170c0'
}
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = 165
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors2.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.5)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.5)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([5400, 5800])
ax.set_ylim([4800, 5290])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 100 # Length of scale bar in microns
scale_bar_x_start = 5410 # X-coordinate for the start of the scale bar
scale_bar_y_start = 4810 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
plt.savefig('leidenclustering/VGN1b6_spikelettissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
mapping lemma primordia MUT¶
# Filter to create a new AnnData object with only the sample 'VGN1b8'
adata_VGN1b8 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1b8'].copy()
adata_VGN1b8.obs.index = [index.split('-')[0] for index in adata_VGN1b8.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1b8_df = adata_VGN1b8.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1b8_df = adata_VGN1b8_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1b_region8_output/cellpose2_micron_space_VGN1b8.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1b8_df['cell_id'] = adata_VGN1b8_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1b8_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1b8_df = adata_VGN1b8.obs[['clusters']].copy()
adata_VGN1b8_df.index = adata_VGN1b8_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1b8_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 43190 | 43190 | 2654750700021100077 | 2 | MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... | None | None | cell | 4.5 | None | 4 |
| 44086 | 44086 | 2654750700021100077 | 0 | MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... | None | None | cell | 1.5 | None | 4 |
| 44310 | 44310 | 2654750700021100077 | 6 | MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... | None | None | cell | 10.5 | None | 4 |
| 43414 | 43414 | 2654750700021100077 | 4 | MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... | None | None | cell | 7.5 | None | 4 |
| 43638 | 43638 | 2654750700021100077 | 1 | MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... | None | None | cell | 3.0 | None | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16813 | 16813 | 2654750700061100059 | 0 | MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... | None | None | cell | 1.5 | None | 9 |
| 16783 | 16783 | 2654750700061100059 | 5 | MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... | None | None | cell | 9.0 | None | 9 |
| 16663 | 16663 | 2654750700061100059 | 3 | MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... | None | None | cell | 6.0 | None | 9 |
| 16723 | 16723 | 2654750700061100059 | 4 | MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... | None | None | cell | 7.5 | None | 9 |
| 16843 | 16843 | 2654750700061100059 | 6 | MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... | None | None | cell | 10.5 | None | 9 |
14749 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 884, Max X: 1809 Min Y: 6243, Max Y: 7527
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([800, 1900])
ax.set_ylim([6100, 7600])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = 155
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([800, 1900])
ax.set_ylim([6100, 7800])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 830 # X-coordinate for the start of the scale bar
scale_bar_y_start = 6250 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1b8_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=True)
plt.show()
mapping terminal spikelet WT¶
# Filter to create a new AnnData object with only the sample 'VGN1b8'
adata_VGN1a6 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1a6'].copy()
adata_VGN1a6.obs.index = [index.split('-')[0] for index in adata_VGN1a6.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1a6_df = adata_VGN1a6.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1a6_df = adata_VGN1a6_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1a_region6_output/cellpose2_micron_space_VGN1a6.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1a6_df['cell_id'] = adata_VGN1a6_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1a6_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1a6_df = adata_VGN1a6.obs[['clusters']].copy()
adata_VGN1a6_df.index = adata_VGN1a6_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1a6_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 14985 | 14985 | 2305551000002100247 | 6 | MULTIPOLYGON (((740.840 5156.623, 742.057 5157... | None | None | cell | 10.5 | None | 0 |
| 14772 | 14772 | 2305551000002100247 | 0 | MULTIPOLYGON (((740.840 5156.623, 742.057 5157... | None | None | cell | 1.5 | None | 0 |
| 14346 | 14346 | 2305551000002100247 | 1 | MULTIPOLYGON (((740.840 5156.623, 742.057 5157... | None | None | cell | 3.0 | None | 0 |
| 13920 | 13920 | 2305551000002100247 | 2 | MULTIPOLYGON (((740.840 5156.623, 742.057 5157... | None | None | cell | 4.5 | None | 0 |
| 14133 | 14133 | 2305551000002100247 | 4 | MULTIPOLYGON (((740.840 5156.623, 742.057 5157... | None | None | cell | 7.5 | None | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 12713 | 12713 | 2305551000055100230 | 2 | MULTIPOLYGON (((311.576 7442.294, 316.129 7442... | None | None | cell | 4.5 | None | 1 |
| 12608 | 12608 | 2305551000055100230 | 3 | MULTIPOLYGON (((311.576 7442.294, 316.129 7442... | None | None | cell | 6.0 | None | 1 |
| 13133 | 13133 | 2305551000055100230 | 0 | MULTIPOLYGON (((311.576 7442.294, 316.129 7442... | None | None | cell | 1.5 | None | 1 |
| 13238 | 13238 | 2305551000055100230 | 6 | MULTIPOLYGON (((311.576 7442.294, 316.129 7442... | None | None | cell | 10.5 | None | 1 |
| 13028 | 13028 | 2305551000055100230 | 5 | MULTIPOLYGON (((311.576 7442.294, 316.129 7442... | None | None | cell | 9.0 | None | 1 |
37835 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 178, Max X: 1475 Min Y: 5150, Max Y: 7524
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([100, 1550])
ax.set_ylim([5000, 7603])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = -17
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([100, 1550])
ax.set_ylim([4800, 7750])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 140 # X-coordinate for the start of the scale bar
scale_bar_y_start = 5100 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1a6_domainmap_500scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
mapping terminal spikelet Mut¶
# Filter to create a new AnnData object with only the sample 'VGN1a4'
adata_VGN1a4 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1a4'].copy()
adata_VGN1a4.obs.index = [index.split('-')[0] for index in adata_VGN1a4.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1a4_df = adata_VGN1a4.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1a4_df = adata_VGN1a4_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1a_region4_output/cellpose2_micron_space_VGN1a4.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1a4_df['cell_id'] = adata_VGN1a4_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1a4_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1a4_df = adata_VGN1a4.obs[['clusters']].copy()
adata_VGN1a4_df.index = adata_VGN1a4_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1a4_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 12211 | 12211 | 2655350200002100079 | 6 | MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... | None | None | cell | 10.5 | None | 9 |
| 11707 | 11707 | 2655350200002100079 | 5 | MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... | None | None | cell | 9.0 | None | 9 |
| 10951 | 10951 | 2655350200002100079 | 2 | MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... | None | None | cell | 4.5 | None | 9 |
| 10699 | 10699 | 2655350200002100079 | 3 | MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... | None | None | cell | 6.0 | None | 9 |
| 11203 | 11203 | 2655350200002100079 | 4 | MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... | None | None | cell | 7.5 | None | 9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10456 | 10456 | 2655350200055100164 | 1 | MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... | None | None | cell | 3.0 | None | 16 |
| 10393 | 10393 | 2655350200055100164 | 4 | MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... | None | None | cell | 7.5 | None | 16 |
| 10267 | 10267 | 2655350200055100164 | 3 | MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... | None | None | cell | 6.0 | None | 16 |
| 10645 | 10645 | 2655350200055100164 | 6 | MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... | None | None | cell | 10.5 | None | 16 |
| 10519 | 10519 | 2655350200055100164 | 5 | MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... | None | None | cell | 9.0 | None | 16 |
44695 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 2401, Max X: 3781 Min Y: 5887, Max Y: 8298
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([2300, 3800])
ax.set_ylim([5700, 8400])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = -20
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([2300, 3800])
ax.set_ylim([5700, 8400])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 2390 # X-coordinate for the start of the scale bar
scale_bar_y_start = 5900 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 20, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1a4_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
Carpel extension round WT¶
# Filter to create a new AnnData object with only the sample 'VGN1c2'
adata_VGN1c2 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1c2'].copy()
adata_VGN1c2.obs.index = [index.split('-')[0] for index in adata_VGN1c2.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1c2_df = adata_VGN1c2.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1c2_df = adata_VGN1c2_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1c_region2_output/cellpose2_micron_space_VGN1c2.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1c2_df['cell_id'] = adata_VGN1c2_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1c2_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1c2_df = adata_VGN1c2.obs[['clusters']].copy()
adata_VGN1c2_df.index = adata_VGN1c2_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1c2_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 26701 | 26701 | 2293599400000100091 | 0 | MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... | None | None | cell | 1.5 | None | 14 |
| 26885 | 26885 | 2293599400000100091 | 6 | MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... | None | None | cell | 10.5 | None | 14 |
| 26517 | 26517 | 2293599400000100091 | 5 | MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... | None | None | cell | 9.0 | None | 14 |
| 25965 | 25965 | 2293599400000100091 | 2 | MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... | None | None | cell | 4.5 | None | 14 |
| 26333 | 26333 | 2293599400000100091 | 1 | MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... | None | None | cell | 3.0 | None | 14 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 222308 | 222308 | 2293599400212100353 | 4 | MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... | None | None | cell | 7.5 | None | 4 |
| 222102 | 222102 | 2293599400212100353 | 2 | MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... | None | None | cell | 4.5 | None | 4 |
| 222720 | 222720 | 2293599400212100353 | 5 | MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... | None | None | cell | 9.0 | None | 4 |
| 221896 | 221896 | 2293599400212100353 | 3 | MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... | None | None | cell | 6.0 | None | 4 |
| 222926 | 222926 | 2293599400212100353 | 0 | MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... | None | None | cell | 1.5 | None | 4 |
106295 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 2060, Max X: 6522 Min Y: 2921, Max Y: 6114
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([1900, 6650])
ax.set_ylim([2700, 6300])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = -124
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([3000, 5500])
ax.set_ylim([1600, 7400])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 3500 # X-coordinate for the start of the scale bar
scale_bar_y_start = 2000 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 30, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1c2_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
making domain map of floral tissues for figure 2
custom_cluster_colors2 = {
'1': '#e10e3c',
'2': '#f47140',
'15': '#e170c0'
}
default_colour = 'whitesmoke'
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors2.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.15)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.15)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4400, 5500])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 100 # Length of scale bar in microns
scale_bar_x_start = 4000 # X-coordinate for the start of the scale bar
scale_bar_y_start = 4500 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
plt.savefig('leidenclustering/VGN1c2_domainmap_floraltissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
now I am making a heatmap of gene expression, used for figure 1
combined_cells_summary_matrix = pd.read_csv('qc/cellmetadata_genecounts_transcriptcounts.csv')
filtered_segmentation_df2 = filtered_segmentation_df.copy()
combined_cells_summary_matrix['EntityID'] = combined_cells_summary_matrix['EntityID'].astype(str)
filtered_segmentation_df2['EntityID'] = filtered_segmentation_df2['EntityID'].astype(str)
# Merge dataframes on 'EntityID', keeping only the necessary column
filtered_segmentation_df2 = filtered_segmentation_df2.merge(
combined_cells_summary_matrix[['EntityID', 'TraesCS6A02G259000']],
on='EntityID',
how='left'
)
# Rotation logic
if 'Original_Geometry' not in filtered_segmentation_df2.columns:
filtered_segmentation_df2['Original_Geometry'] = filtered_segmentation_df2['Geometry']
# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df2['Original_Geometry'].apply(
lambda geom: affine_transform(geom, rotation_matrix)
)
filtered_segmentation_df2['Geometry'] = rotated_geometries
# Initialize colormap
colormap = cm.get_cmap('viridis') # You can use other colormaps like 'plasma' or 'cividis'
norm = colors.Normalize(vmin=0, vmax=37)
# Plot the heatmap
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df2.iterrows():
shape = row['Geometry']
normalized_value = row['TraesCS6A02G259000']
color = colormap(norm(normalized_value))
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(
list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(
list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
)
patches_list.append(patch)
# Adjust axis limits if needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4500, 4950])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 250 # Length of scale bar in microns
scale_bar_x_start = 4100 # X-coordinate for the start of the scale bar
scale_bar_y_start = 4525 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_microns],
[scale_bar_y_start, scale_bar_y_start],
color="white",
linewidth=2
)
ax.add_line(scale_bar)
# Add colorbar
plt.colorbar(cm.ScalarMappable(norm=norm, cmap=colormap), ax=ax, label="Expression Level")
ax.set_facecolor('black')
plt.savefig('leidenclustering/VGN1c2_heatmapAGL6_floraltissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
plt.style.use('dark_background')
# Rotation logic
if 'Original_Geometry' not in filtered_segmentation_df2.columns:
filtered_segmentation_df2['Original_Geometry'] = filtered_segmentation_df2['Geometry']
# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df2['Original_Geometry'].apply(
lambda geom: affine_transform(geom, rotation_matrix)
)
filtered_segmentation_df2['Geometry'] = rotated_geometries
# Initialize colormap
colormap = cm.get_cmap('viridis') # You can use other colormaps like 'plasma' or 'cividis'
norm = colors.Normalize(vmin=0, vmax=37)
# Plot the heatmap
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df2.iterrows():
shape = row['Geometry']
normalized_value = row['TraesCS6A02G259000']
color = colormap(norm(normalized_value))
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(
list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(
list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
)
patches_list.append(patch)
# Adjust axis limits if needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4500, 4950])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 250 # Length of scale bar in microns
scale_bar_x_start = 4100 # X-coordinate for the start of the scale bar
scale_bar_y_start = 4525 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_microns],
[scale_bar_y_start, scale_bar_y_start],
color="white",
linewidth=2
)
ax.add_line(scale_bar)
# Add colorbar
plt.colorbar(cm.ScalarMappable(norm=norm, cmap=colormap), ax=ax, label="Expression Level")
ax.set_facecolor('black')
plt.savefig('leidenclustering/VGN1c2_heatmapAGL6_floraltissues_100scalebar_darkbackground.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
carpel extension round MUT¶
# Filter to create a new AnnData object with only the sample 'VGN1c3'
adata_VGN1c3 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1c3'].copy()
adata_VGN1c3.obs.index = [index.split('-')[0] for index in adata_VGN1c3.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1c3_df = adata_VGN1c3.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1c3_df = adata_VGN1c3_df.rename(columns={'index': 'cell_id'})
# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1c_region3_output/cellpose2_micron_space_VGN1c3.parquet")
#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1c3_df['cell_id'] = adata_VGN1c3_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1c3_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#include cluster infomration into the dataframe
adata_VGN1c3_df = adata_VGN1c3.obs[['clusters']].copy()
adata_VGN1c3_df.index = adata_VGN1c3_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1c3_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 30064 | 30064 | 2654449700017100069 | 4 | MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... | None | None | cell | 7.5 | None | 1 |
| 30397 | 30397 | 2654449700017100069 | 0 | MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... | None | None | cell | 1.5 | None | 1 |
| 29953 | 29953 | 2654449700017100069 | 2 | MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... | None | None | cell | 4.5 | None | 1 |
| 30286 | 30286 | 2654449700017100069 | 5 | MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... | None | None | cell | 9.0 | None | 1 |
| 29842 | 29842 | 2654449700017100069 | 3 | MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... | None | None | cell | 6.0 | None | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27037 | 27037 | 2654449700218100189 | 3 | MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... | None | None | cell | 6.0 | None | 13 |
| 27119 | 27119 | 2654449700218100189 | 2 | MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... | None | None | cell | 4.5 | None | 13 |
| 27365 | 27365 | 2654449700218100189 | 5 | MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... | None | None | cell | 9.0 | None | 13 |
| 27201 | 27201 | 2654449700218100189 | 4 | MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... | None | None | cell | 7.5 | None | 13 |
| 27283 | 27283 | 2654449700218100189 | 1 | MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... | None | None | cell | 3.0 | None | 13 |
104825 rows × 10 columns
# I need to look into the region min and max values so I know where to set the axes of graph
# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, (Polygon, MultiPolygon)):
# Get the bounding box of the shape
bounds = shape.bounds # Returns (min_x, min_y, max_x, max_y)
min_x = min(min_x, bounds[0])
min_y = min(min_y, bounds[1])
max_x = max(max_x, bounds[2])
max_y = max(max_y, bounds[3])
# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)
# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 1814, Max X: 6184 Min Y: 4929, Max Y: 7967
plt.style.use('default')
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
patches_list.append(patch)
# Set axis limits
ax.set_xlim([1700, 6300])
ax.set_ylim([4700, 8100])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']
# Define rotation angle in degrees
rotation_angle = -124
angle_rad = np.radians(rotation_angle)
# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)
# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2 # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2 # Use the previously computed min_y, max_y
# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
cos_theta, -sin_theta, # a, b
sin_theta, cos_theta, # d, e
centroid_x - cos_theta * centroid_x + sin_theta * centroid_y, # xoff
centroid_y - sin_theta * centroid_x - cos_theta * centroid_y # yoff
]
# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))
# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries
# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on the custom mapping, with a default fallback
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
patches_list.append(patch)
# Adjust axis limits as needed
ax.set_xlim([1700, 6300])
ax.set_ylim([3500, 9000])
ax.set_aspect('equal')
# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
# Add the scale bar
scale_bar_length_microns = 500 # Length of scale bar in microns
scale_bar_x_start = 3300 # X-coordinate for the start of the scale bar
scale_bar_y_start = 3900 # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns # Assuming 1 unit = 1 micron
# Add scale bar as a line
scale_bar = Line2D(
[scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
[scale_bar_y_start, scale_bar_y_start],
color="black",
linewidth=2
)
ax.add_line(scale_bar)
# Add label for scale bar
ax.text(
scale_bar_x_start + scale_bar_length_units / 2,
scale_bar_y_start - 30, # Adjust to place label below the bar
f"{scale_bar_length_microns} µm",
color="black",
ha="center",
va="top",
fontsize=8
)
plt.savefig('leidenclustering/VGN1c3_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png')
plt.show()
Visualising marker genes with spatial maps, annotated with transcripts¶
segmentation_df = gpd.read_parquet('cell_segmentation/VGN1e_region1_output/cellpose2_micron_space_VGN1e1.parquet')
transcripts_df = pd.read_csv('cell_segmentation/VGN1e_region1_output/detected_transcripts_VGN1e1.csv')
# Filter to create a new AnnData object with only the sample 'VGN1e1' with my cluster information included
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()
adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()
# Extract id for all cells in my VGN1e1 sample
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'cell_id'})
adata_VGN1e1_df['cell_id'] = adata_VGN1e1_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e1_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
#merge with segmentation information with cluster information
adata_VGN1e1_df = adata_VGN1e1.obs[['clusters']].copy()
adata_VGN1e1_df.index = adata_VGN1e1_df.index.astype(str) # Ensure the index is a string if necessary
merged_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e1_df, left_on='EntityID', right_index=True)
merged_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 48334 | 48334 | 2343479300017100075 | 1 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 3.0 | None | 6 |
| 48829 | 48829 | 2343479300017100075 | 6 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 10.5 | None | 6 |
| 48664 | 48664 | 2343479300017100075 | 0 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 1.5 | None | 6 |
| 48499 | 48499 | 2343479300017100075 | 5 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 9.0 | None | 6 |
| 48004 | 48004 | 2343479300017100075 | 2 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 4.5 | None | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28175 | 28175 | 2343479300098100014 | 2 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 4.5 | None | 12 |
| 28195 | 28195 | 2343479300098100014 | 0 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 1.5 | None | 12 |
| 28200 | 28200 | 2343479300098100014 | 6 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 10.5 | None | 12 |
| 28185 | 28185 | 2343479300098100014 | 1 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 3.0 | None | 12 |
| 28170 | 28170 | 2343479300098100014 | 3 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 6.0 | None | 12 |
15463 rows × 10 columns
#Now I also want to filter out my transcripts, because there are many that aren't relevant to me and this slows down computational time
#I will filter to only transcripts that fall within the cells of interest
# Ensures that filtered_segmentation_df is a GeoDataFrame with geometries representing spatial areas (e.g., polygons).
filtered_segmentation_df = gpd.GeoDataFrame(filtered_segmentation_df, geometry='Geometry')
# Converts transcripts_df into a GeoDataFrame where each transcript is represented as a point geometry derived from its global_x and global_y coordinates.
transcripts_gdf = gpd.GeoDataFrame(
transcripts_df,
geometry=gpd.points_from_xy(transcripts_df['global_x'], transcripts_df['global_y'])
)
# Matches transcript points (transcripts_gdf) to spatial regions (filtered_segmentation_df) using a spatial join (predicate='within').
filtered_transcripts_gdf = gpd.sjoin(
transcripts_gdf,
filtered_segmentation_df[['EntityID', 'Geometry']],
how='inner',
predicate='within'
)
# Remove unnecessary columns and duplicates
filtered_transcripts_df = (
filtered_transcripts_gdf
.drop(columns=['index_right', 'Unnamed: 0'], errors='ignore') # Drop join-related columns
.drop_duplicates() # Remove duplicate rows
)
# Reset the index for clarity
filtered_transcripts_df.reset_index(drop=True, inplace=True)
# Display result
filtered_transcripts_df
| barcode_id | global_x | global_y | global_z | x | y | fov | gene | transcript_id | cell_id | geometry | EntityID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 38 | 3446.3018 | 7837.6090 | 1.0 | 388.12305 | 474.39932 | 75 | TraesCS3D02G284200 | TraesCS3D02G284200_1 | 2343479300095100291 | POINT (3446.302 7837.609) | 2343479300095100291 |
| 1 | 40 | 3444.9426 | 7839.1235 | 1.0 | 375.53894 | 488.42523 | 75 | TraesCS1A02G199600 | TraesCS1A02G199600_2 | 2343479300095100291 | POINT (3444.943 7839.123) | 2343479300095100291 |
| 2 | 70 | 3444.9240 | 7838.3220 | 1.0 | 375.36667 | 481.00000 | 75 | TraesCS7D02G246100 | TraesCS7D02G246100_1 | 2343479300095100291 | POINT (3444.924 7838.322) | 2343479300095100291 |
| 3 | 93 | 3442.4006 | 7835.5137 | 1.0 | 352.00000 | 455.00000 | 75 | TraesCS3B02G435700 | TraesCS3B02G435700_1 | 2343479300095100291 | POINT (3442.401 7835.514) | 2343479300095100291 |
| 4 | 98 | 3444.8670 | 7839.4214 | 1.0 | 374.83790 | 491.18338 | 75 | TraesCS4D02G076900 | TraesCS4D02G076900_1 | 2343479300095100291 | POINT (3444.867 7839.421) | 2343479300095100291 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 325696 | 129 | 4121.7773 | 6368.6880 | 6.0 | 971.40650 | 1758.59350 | 199 | TraesCS7A02G313100 | TraesCS7A02G313100_3 | 2343479300019100003 | POINT (4121.777 6368.688) | 2343479300019100003 |
| 325697 | 196 | 4116.4194 | 6367.1820 | 6.0 | 921.79400 | 1744.65250 | 199 | TraesCS2B02G260800 | TraesCS2B02G260800_1 | 2343479300019100003 | POINT (4116.419 6367.182) | 2343479300019100003 |
| 325698 | 196 | 4120.6953 | 6369.7456 | 6.0 | 961.38745 | 1768.38750 | 199 | TraesCS2B02G260800 | TraesCS2B02G260800_1 | 2343479300019100003 | POINT (4120.695 6369.746) | 2343479300019100003 |
| 325699 | 211 | 4115.4690 | 6368.1646 | 6.0 | 913.00000 | 1753.74580 | 199 | TraesCS2A02G114900 | TraesCS2A02G114900_1 | 2343479300019100003 | POINT (4115.469 6368.165) | 2343479300019100003 |
| 325700 | 245 | 4123.5693 | 6371.2160 | 6.0 | 988.00000 | 1782.00000 | 199 | TraesCS2A02G391100 | TraesCS2A02G391100_2 | 2343479300019100003 | POINT (4123.569 6371.216) | 2343479300019100003 |
325701 rows × 12 columns
first I will label the expression domains of interest
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'12': '#6ecad5',
'9': '#4915be',
'3': '#f0b932'
}
# Default color for unspecified clusters
default_color = "white"
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation and assign cluster colors to each Polygon
rotated_patches_list = []
for _, row in merged_segmentation_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on cluster
color = custom_cluster_colors.get(cluster, default_color)
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor='#595959', linewidth=1.0)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor='#595959', linewidth=1.0)
rotated_patches_list.append(patch)
# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_domainmap_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
now I will plot out enriched gene markers of interest
transcripts_in_area = filtered_transcripts_df.copy()
# Define genes and colors
genes_of_interest = {
'TraesCS4A02G256700': '#a70e62'
}
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
rotated_centroids = [] # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Add rotated transcript coordinates for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_KNOX5map_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
# Define genes and colors
genes_of_interest = {
'TraesCS1A02G418200': '#3CBBC8'
}
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
rotated_centroids = [] # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Add rotated transcript coordinates for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_NL1map_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
# Define genes and colors
genes_of_interest = {
'TraesCS1B02G042200': '#f0b932'
}
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
rotated_centroids = [] # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Add rotated transcript coordinates for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MT2map_250scalebar.svg', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
# Define genes and colors
genes_of_interest = {
'TraesCS1B02G042200': '#f0b932'
}
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
rotated_centroids = [] # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Add rotated transcript coordinates for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MT2map_250scalebar.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
# Define genes and colors
genes_of_interest = {
'TraesCS7B02G413900': '#4915be'
}
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
rotated_centroids = [] # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
rotated_polygons.append(rotated_poly)
rotated_centroids.append(rotated_poly.centroid)
patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Add rotated transcript coordinates for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3620 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MNDmap_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()